Setup

Benthic

Point cover

Data source: “BenthicPointCoverScaledByTransect.xlsx” (scaled to not include non-reef substrate)

Import + wrangle data

benthic_var <- c("lc", "cca", "ta", "tas", "fma", "cma", "cyan", "ainv", "peys")

benthic_cover_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "BenthicPointCoverScaled", "BenthicPointCoverScaledByTransect.xlsx"), sheet = "Data") %>% 
  clean_names() %>%
  mutate(year = year(date),
         trans = as.character(trans), # to bind with NPA data - some trans numbers include surveyor initials
         t_tas = tas + tam) %>% 
  select(site, year, trans, benthic_var) %>%
  mutate(across(benthic_var, ~ . / 100)) %>%
  bind_rows(
    bind_rows(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "Overall") %>%
                clean_names() %>%
                mutate(`transect_name` = as.character(`transect_name`)) %>%  # to match NPA with surv initials
                left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "tTA") %>%
                            clean_names() %>%
                            select(transect_id, ta, sta, tas, tam)),
              read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "Overall") %>%
                clean_names() %>%
                left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "tTA") %>%
                            clean_names() %>%
                            select(transect_id, ta, sta, tas, tam))) %>%
      mutate(year = 2025,
             survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name),
             tas_sum = tas + sta + tam) %>%
      select(site = survey_name, year, trans = transect_name, lc = t_coral, cca = t_cca, ta, tas = tas_sum, fma = t_fma, cma = t_cma, cyan = t_cyan, ainv = t_ainv, peys = t_pey)) %>%
  mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA sites that have site numbers with different name variations afterwards
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA"))) %>%
  filter(year != 2022) %>% # 2022 was incomplete data collection year for NPA
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()

benthic_cover_site <- benthic_cover_trans %>%
  group_by(site, site_cat, year) %>%
  rename_with(~ str_remove(.x, "t_")) %>%
  summarise(across(c(benthic_var),
                   list(mean = ~ mean(.x, na.rm = TRUE),
                        sd   = ~ sd(.x)),
                   .names = "{.col}_{.fn}")) %>%
  ungroup()

benthic_cover_year <- benthic_cover_site %>%
  select(!contains("_se")) %>%
  rename_with(~ str_remove(.x, "_mean")) %>%
  group_by(year, site_cat) %>%
  summarise(across(c(benthic_var),
                   list(mean = ~ mean(.x, na.rm = TRUE),
                        sd   = ~ sd(.x)),
                   .names = "{.col}_{.fn}")) %>%
  ungroup()

# write.csv(benthic_cover_year, here("agrra_monitoring", "data_outputs", "benthic_cover_year.csv"))

Line plots by site

ggplot(benthic_cover_site, aes(x = year, y = lc_mean, group = site, shape = site)) +
  geom_point() +
  scale_shape_manual(values = 0:35) +
  geom_line() +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Year", y = "Live Coral Cover", color = "Site", group = "Site") +
  theme_minimal()

ggplot(benthic_cover_site, aes(x = year, y = fma_mean, group = site, shape = site)) +
  geom_point() +
  scale_shape_manual(values = 0:35) +
  geom_line() +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Year", y = "FMA Cover", color = "Site", group = "Site") +
  theme_minimal()

ggplot(benthic_cover_site, aes(x = year, y = ta_mean, group = site, shape = site)) +
  geom_point() +
  scale_shape_manual(values = 0:35) +
  geom_line() +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Year", y = "TA Cover", color = "Site", group = "Site") +
  theme_minimal()

ggplot(benthic_cover_site, aes(x = year, y = tas_mean, group = site, shape = site)) +
  geom_point() +
  scale_shape_manual(values = 0:35) +
  geom_line() +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Year", y = "TA/Sediment Cover", color = "Site", group = "Site") +
  theme_minimal()

Box plots by year

make_bp <- function(df, site_name, yvar, ylab,
                          y_max = NULL, test_method = "wilcox.test",
                          show_ns = FALSE, remove_x_text = FALSE,
                          title = TRUE, remove_y_text = FALSE) {

  df_sub <- df %>%
    filter(site_cat == site_name) %>%
    mutate(year = factor(as.character(year), levels = c("2017", "2019", "2025")))

  yrs <- sort(unique(as.character(df_sub$year)))
  comps <- if (length(yrs) >= 2) combn(yrs, 2, simplify = FALSE) else list()

  yvar_sym <- rlang::sym(yvar)

  p <- ggplot(df_sub, aes(x = year, y = !!yvar_sym)) +
    geom_boxplot(outlier.size = 1, width = 0.6) +
    geom_jitter(width = 0.15, size = 1, alpha = 0.6) +
    scale_y_continuous(labels = label_percent()) +
    labs(x = "Year", 
         y = if (!remove_y_text) ylab else NULL,
         title = if (title) site_name else NULL) +
    theme_pub() +
    theme(
      plot.title = element_text(size = 14, face = "bold", hjust = 0.5)
    )

  if (remove_y_text) {
    p <- p + theme(
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
      axis.title.y = element_blank()
    )
  }

  if (remove_x_text) {
    p <- p + theme(
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank()
    )
  }

  if (!is.null(y_max)) {
    p <- p + coord_cartesian(ylim = c(0, y_max))
  }

  if (length(comps) > 0) {
    p <- p + stat_compare_means(
      method = test_method,
      comparisons = comps,
      label = "p.signif",
      hide.ns = !show_ns,
      vjust = 0
    )
  }

  p
}


p1 <- make_bp(benthic_cover_site, "NEMMA",
                    yvar = "lc_mean", ylab = "Live Coral",
                    y_max = 0.40,
                    remove_x_text = TRUE,
                    title = TRUE,
                    remove_y_text = FALSE)

p2 <- make_bp(benthic_cover_site, "NDNP",
                    yvar = "lc_mean", ylab = "Live Coral",
                    y_max = 0.40,
                    remove_x_text = TRUE,
                    title = TRUE,
                    remove_y_text = TRUE)

p3 <- make_bp(benthic_cover_site, "NEMMA",
                    yvar = "fma_mean", ylab = "Fleshy Macroalgae",
                    y_max = 0.80,
                    title = FALSE,
                    remove_y_text = FALSE)

p4 <- make_bp(benthic_cover_site, "NDNP",
                    yvar = "fma_mean", ylab = "Fleshy Macroalgae",
                    y_max = 0.80,
                    title = FALSE,
                    remove_y_text = TRUE)

final_plot <- (p1 | p2) / (p3 | p4)
  # + plot_annotation(tag_levels = "A")  # optional panel labels A, B, C, D

final_plot

ggsave(here("agrra_monitoring", "figs", "lc_fma_cover.png"), plot = final_plot, width = 8, height = 6)

Radar plots

NEMMA

max_pc <- benthic_cover_year %>%
  filter(site_cat == "NEMMA") %>%
  select(contains("_mean")) %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol_pc <- ncol(benthic_cover_year %>%
                  filter(site_cat == "NEMMA") %>%
                  select(contains("_mean")))

benthic_rdr <- 
  rbind(rep(max_pc, ncol_pc), 
        rep(0, ncol_pc), 
        benthic_cover_year %>%
          filter(site_cat == "NEMMA") %>%
          column_to_rownames(var = "year") %>%
          select(contains("_mean")) %>%
          rename_with(~ gsub("_mean", "", .x))
        )
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(benthic_rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
  x = "topright",
  legend = rownames(benthic_rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

NDNP

max_pc <- benthic_cover_year %>%
  filter(site_cat == "NDNP") %>%
  select(contains("_mean")) %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol_pc <- ncol(benthic_cover_year %>%
                  filter(site_cat == "NDNP") %>%
                  select(contains("_mean")))


benthic_rdr <- 
  rbind(rep(max_pc, ncol_pc), 
        rep(0, ncol_pc), 
        benthic_cover_year %>%
          filter(site_cat == "NDNP") %>%
          column_to_rownames(var = "year") %>%
          select(contains("_mean")) %>%
          rename_with(~ gsub("_mean", "", .x))
        )
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(benthic_rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
  x = "topright",
  legend = rownames(benthic_rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

NMDS

Removing cyanobacteria here due to seasonality and peysonnelids in NDNP due to lack in confidence about identification

nmds_var <- c("lc", "cca", "ta", "tas", "fma", "cma", "ainv")

benthic_nmds <- benthic_cover_site %>%
  rename_with(~ gsub("_mean", "", .x)) %>%
  mutate(period = ifelse(year == "2025", "2025", "2017-2019")) %>%
  select(site, site_cat, period, nmds_var) %>%
  ungroup()

benthic_meta <- benthic_nmds %>%
  ungroup() %>%
  select(site, site_cat, period)

benthic_pc <- benthic_nmds %>%
  ungroup() %>%
  select(-site, -site_cat, -period)

set.seed(123)
benthic_nmds_ord <- metaMDS(
  benthic_pc,
  distance = "bray",
  k = 2,
  trymax = 100,
  trace = 0
)

site_scores <- scores(benthic_nmds_ord, display = "sites") %>%
  as.data.frame() %>%
  mutate(site     = benthic_meta$site,
         site_cat = benthic_meta$site_cat,
         period     = benthic_meta$period)

pc_scores <- scores(benthic_nmds_ord, display = "species") %>%
  as.data.frame() %>%
  rownames_to_column(var = "category")

arrow_mult <- 2

pc_scores_scaled <- pc_scores %>%
  mutate(
    NMDS1 = NMDS1 * arrow_mult,
    NMDS2 = NMDS2 * arrow_mult,
    label_nudge_x = ifelse(NMDS1 >= 0, 0.05, -0.05),
    label_nudge_y = ifelse(NMDS2 >= 0, 0.05, -0.05)
  )

p <- ggplot(site_scores, aes(x = NMDS1, y = NMDS2, color = period)) +
  geom_point(size = 1, alpha = 0.4) +
  stat_ellipse(aes(group = period), type = "t", linetype = 2) +
  facet_wrap(~ site_cat) +
  # geom_segment(data = pc_scores_scaled,
  #              aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
  #              arrow = arrow(length = unit(0.3, "cm")),
  #              color = "black",
  #              inherit.aes = FALSE) +
  # geom_text(data = pc_scores_scaled,
  #           aes(x = NMDS1 + label_nudge_x,
  #               y = NMDS2 + label_nudge_y,
  #               label = category),
  #           color = "black", size = 3, inherit.aes = FALSE) +
  theme_pub() +
  labs(color = "Year") +
  theme(strip.text = element_text(size = 14, face = "bold"),
        legend.position = "bottom")
p

ggsave(plot = p, here("agrra_monitoring", "figs", "benthic_nmds.png"), width = 8, height = 5)
# PERMANOVA

### need to fix code so it distinguishes between year and site_cat in the model
set.seed(123)
adonis_res <- adonis2(
  as.matrix(benthic_nmds[, nmds_var]) ~ site_cat + period,
  data = benthic_nmds,
  permutations = 999,
  method = "bray"
)

permanova_table <- adonis_res %>%
  as.data.frame() %>%
  rownames_to_column("Factor") %>%
  filter(Factor != "Residual") %>%     # usually not shown
  select(Factor, Df, R2, F, `Pr(>F)`) %>%
  rename(p = `Pr(>F)`) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))
permanova_table
##   Factor Df    R2      F     p
## 1  Model  2 0.539 23.985 0.001
## 2  Total 43 1.000     NA    NA
ft <- flextable(permanova_table) |>
  autofit()

read_docx() |>
  body_add_flextable(ft) |>
  print(target = here("agrra_monitoring", "figs", "benthic_nmds_permanova.docx"))
# envfit

full_names <- c(
  lc  = "Live coral",
  ta  = "Turf algae",
  tas = "Turf algae – sediment",
  fma = "Fleshy macroalgae",
  cma = "Calcareous macroalgae",
  ainv = "Aggressive invertebrates",
  cca  = "Crustose coralline algae"
)

env_table <- envfit(benthic_nmds_ord, benthic_pc, permutations = 999) %>%
  (\(.) {
    arrows <- as.data.frame(.$vectors$arrows) %>%
      rownames_to_column("code")

    stats <- tibble(
      code = rownames(.$vectors$arrows),
      r2 = .$vectors$r,
      p = .$vectors$pvals
    )

    arrows %>%
      left_join(stats, by = "code") %>%
      mutate(
        category = full_names[code]
      ) %>%
      select(category, code, NMDS1, NMDS2, r2, p)
  })() %>%
  filter(p < 0.05) %>% # keep only significant variables
  arrange(p, desc(r2)) %>% # order by sig, then by r2
  mutate(across(
    where(is.numeric),
    ~ round(.x, 3)
  ))

# export table
library(gt)

gt_table <- env_table %>%
  select(-code) %>%   # remove code column
  gt() %>%
  fmt_number(
    columns = where(is.numeric),
    decimals = 3
  ) %>%
  cols_align(
    align = "center",
    columns = everything()
  ) %>%
  cols_label(
    category = "Category",
    NMDS1 = "NMDS1",
    NMDS2 = "NMDS2",
    r2 = "R²",
    p = "p-value"
  )

gtsave(gt_table, here("agrra_monitoring", "figs", "benthic_nmds_envfit.docx"))
#### NEMMA only
benthic_nmds <- benthic_cover_site %>%
  filter(site_cat == "NEMMA") %>%
  select(-site_cat, 
         -contains("_sd"), 
         -contains("cyan"),
         -contains("peys")) %>%
  rename_with(~ gsub("_mean", "", .x)) %>%
  mutate(year = as.character(year))

# Separate metadata and community data
benthic_meta <- benthic_nmds %>% ungroup %>% select(site, year)
benthic_pc   <- benthic_nmds %>% ungroup %>% select(-site, -year)

# Run NMDS
set.seed(123) # fixes random number generator so results are reproducible
benthic_nmds_ord <- metaMDS(benthic_pc, distance = "bray", k = 2, trymax = 100, trace = 0)

# Extract NMDS scores and add metadata
site_scores <- scores(benthic_nmds_ord, display = "sites") %>%
  as.data.frame() %>%
  mutate(site = benthic_meta$site,
         year = benthic_meta$year)

# Extract pc (community variable) scores and add metadata
pc_scores <- scores(benthic_nmds_ord, display = "species") %>%
  as.data.frame() %>%
  rownames_to_column(var = "category")

# Plot NMDS

## defining NMDS arrows
arrow_mult <- 2
pc_scores_scaled <- pc_scores %>%
  mutate(NMDS1 = NMDS1 * arrow_mult,
         NMDS2 = NMDS2 * arrow_mult)

## adding offset columns to move arrow labels
pc_scores_scaled <- pc_scores_scaled %>%
  mutate(
    label_nudge_x = ifelse(NMDS1 >= 0, 0.05, -0.05),
    label_nudge_y = ifelse(NMDS2 >= 0, 0.05, -0.05)
  )

## plot
ggplot(site_scores, aes(x = NMDS1, y = NMDS2, color = year)) +
  geom_point(size = 4) +
  scale_shape_manual(values = 0:25) +
  stat_ellipse(aes(group = year), type = "t", linetype = 2) +
  geom_segment(data = pc_scores_scaled,
               aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
               arrow = arrow(length = unit(0.3, "cm")),
               color = "black",
               inherit.aes = FALSE) +
  geom_text(data = pc_scores_scaled,
            aes(x = NMDS1 + label_nudge_x,
                y = NMDS2 + label_nudge_y,
                label = category),
            color = "black",
            inherit.aes = FALSE) +
  theme_minimal()

# PERMANOVA (adonis2)
set.seed(123)
adonis_res <- adonis2(benthic_pc ~ year, 
                      data = benthic_meta, 
                      method = "bray", 
                      permutations = 999)

print(adonis_res)
#### NDNP only
benthic_nmds <- benthic_cover_site %>%
  filter(site_cat == "NDNP") %>%
  select(-site_cat, 
         -contains("_sd"), 
         -contains("cyan"),
         -contains("peys")) %>%
  rename_with(~ gsub("_mean", "", .x)) %>%
  mutate(year = as.character(year))

# Separate metadata and community data
benthic_meta <- benthic_nmds %>% ungroup %>% select(site, year)
benthic_pc   <- benthic_nmds %>% ungroup %>% select(-site, -year)

# Run NMDS
set.seed(123) # fixes random number generator so results are reproducible
benthic_nmds_ord <- metaMDS(benthic_pc, distance = "bray", k = 2, trymax = 100, trace = 0)

# Extract NMDS scores and add metadata
site_scores <- scores(benthic_nmds_ord, display = "sites") %>%
  as.data.frame() %>%
  mutate(site = benthic_meta$site,
         year = benthic_meta$year)

# Extract pc (community variable) scores and add metadata
pc_scores <- scores(benthic_nmds_ord, display = "species") %>%
  as.data.frame() %>%
  rownames_to_column(var = "category")

# Plot NMDS

## defining NMDS arrows
arrow_mult <- 2
pc_scores_scaled <- pc_scores %>%
  mutate(NMDS1 = NMDS1 * arrow_mult,
         NMDS2 = NMDS2 * arrow_mult)

## adding offset columns to move arrow labels
pc_scores_scaled <- pc_scores_scaled %>%
  mutate(
    label_nudge_x = ifelse(NMDS1 >= 0, 0.05, -0.05),
    label_nudge_y = ifelse(NMDS2 >= 0, 0.05, -0.05)
  )

## plot
ggplot(site_scores, aes(x = NMDS1, y = NMDS2, color = year)) +
  geom_point(size = 4) +
  stat_ellipse(aes(group = year), type = "t", linetype = 2) +
  geom_segment(data = pc_scores_scaled,
               aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
               arrow = arrow(length = unit(0.3, "cm")),
               color = "black",
               inherit.aes = FALSE) +
  geom_text(data = pc_scores_scaled,
            aes(x = NMDS1 + label_nudge_x,
                y = NMDS2 + label_nudge_y,
                label = category),
            color = "black",
            inherit.aes = FALSE) +
  theme_minimal()

# PERMANOVA (adonis2)
set.seed(123)
adonis_res <- adonis2(benthic_pc ~ year, 
                      data = benthic_meta, 
                      method = "bray", 
                      permutations = 999)

print(adonis_res)

Site 12 is skewing this quite a bit - but don’t see reason to omit?

Coral species composition

Data overview:

  • 2017: “CoralCoverSpeciesScaledBySite”
    • One sheet (‘Data’) with percent cover by species
  • 2025: BenthicCoralCoverScaledByTransect.xlsx”
    • Separate sheets per family give percent cover by species - inconvenient format, need to bind all together

Import + wrangle data

# metadata for species - family matching
coral_meta <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoralCoverScaled.xlsx"), sheet = "Metadata") %>% 
  clean_names() %>%
  fill(scaled_coral_cover, column_header) 

coral_meta_fam <- coral_meta %>%
  filter(str_starts(column_header, "t") | column_header == "UNKN") %>%
  select(family_code = column_header, family = definition) %>%
  mutate(family = gsub("\\s*\\([^\\)]+\\)", "", family)) %>% # removing fluff
  distinct() # removing UNKN duplicate

coral_meta_spp <- coral_meta %>%
  filter(str_starts(scaled_coral_cover, "t") | scaled_coral_cover == "UNKN") %>%
  select(species_code = column_header, family_code = scaled_coral_cover, species = definition) %>%
  left_join(coral_meta_fam, by = "family_code") %>%
  mutate(genus = sub(" .*", "", species),
         family_code = str_remove(family_code, "^t"),
         across(c(species, family), ~ str_replace(., "Unknown sp. of Live Coral", "Unknown spp.")))

# historical data
coral_spp_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "CoralCoverSpeciesScaled", "CoralCoverSpeciesScaledBySite.xlsx"), sheet = "Data") %>% 
  rename_with(fix_names) %>%
  mutate(year = year(date)) %>%
  filter(year %in% c(2017, 2019)) %>% # 2022 was incomplete NPA year
  pivot_longer(
    cols = matches("^[a-z]{4}_(avg|std)$"),   # all columns like acer_ave, apal_std, etc.
    names_to = c("species_code", ".value"),        # split into species and measurement type
    names_sep = "_"
  ) %>%
  mutate(across(c(avg, std), ~ .x / 100)) %>% # to match 2025 format (fraction of 1)
  select(site, year, species_code, avg, std) 

# 2025 data
file_coral_nemma <- here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoralCoverScaled.xlsx")
file_coral_npa <- here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoralCoverScaled.xlsx")

clean_excel_coral <- function(file_path) {
  sheets <- excel_sheets(file_path)
  sheets_to_join <- setdiff(sheets, c("TermsOfUse", "Metadata", "Overall"))  # exclude

  sheets_list <- map(sheets_to_join, ~ read_excel(file_path, sheet = .x) %>%
                       rename_with(~ gsub("^(t)([A-Za-z]{4})(avg|med|std)$", "\\1_\\2_\\3", .x, ignore.case = TRUE)) %>%
                       rename_with(~ gsub("([A-Za-z]{4})(avg|med|std)$", "\\1_\\2", .x, ignore.case = TRUE)) %>%
                       clean_names())

  reduce(sheets_list, left_join, by = c("survey_id", "code", "name", "nt"))
}

coral_spp_2025 <- clean_excel_coral(file_coral_npa) %>%
  bind_rows(clean_excel_coral(file_coral_nemma)) %>%
  mutate(year = 2025) %>%
  rename(site = name) %>%
  pivot_longer(
    cols = matches("^[a-z]{4}_(avg|std)$"),   # all columns like acer_ave, apal_std, etc.
    names_to = c("species_code", ".value"),        # split into species and measurement type
    names_sep = "_"
  ) %>%
  select(site, year, species_code, avg, std)

# merge for complete df
coral_spp_site <- bind_rows(coral_spp_hist, coral_spp_2025) %>%
  mutate(site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA")),
         site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site)) %>%
  mutate(species_code = toupper(species_code),
         site = if_else(site == "Little Bird Patch", "Little Bird Backreef", site)) %>%
  left_join(coral_meta_spp, by = "species_code")  %>%
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()

coral_spp_year <- coral_spp_site %>%
  group_by(year, site_cat, family, genus, species_code, species) %>%
  summarize(mean = mean(avg), 
            sd = sd(avg)) %>%
  group_by(year, site_cat) %>%
  mutate(rel_mean = mean / sum(mean)) %>%
  group_by(year, site_cat, species) %>%
  filter(sum(mean) > 0) %>% # removing species that were 0 in all years/site_cats
  ungroup()

coral_fam_year <- coral_spp_year %>%
  group_by(year, site_cat, family) %>%
  summarize(mean = sum(mean)) %>%
  group_by(year, site_cat) %>%
  mutate(rel_mean = mean/sum(mean)) %>%
  group_by(year, site_cat) %>%
  complete(
    family = unique(coral_spp_year$family),
    fill = list(mean = 0, rel_mean = 0)
  ) %>%
  ungroup()

coral_fam_year_wide <- coral_fam_year %>%
  pivot_wider(names_from = family,
              values_from = c(mean, rel_mean),
              names_glue = "{family}_{.value}")

Line plots

chk <- coral_fam_year %>%
  group_by(year, site_cat) %>%
  summarize(tot = sum(rel_mean))

ggplot(coral_fam_year, aes(x = year, y = mean, group = site_cat, color = site_cat)) +
  geom_point() +
  geom_line() + 
  scale_y_continuous(labels = percent_format()) +
  labs(x = "Year", y = "Mean percent cover", color = "Location") +
  facet_wrap(~ family) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(coral_fam_year, aes(x = year, y = rel_mean, group = site_cat, color = site_cat)) +
  geom_point() +
  geom_line() + 
  scale_y_continuous(labels = percent_format()) +
  labs(x = "Year", y = "Relative proportion", color = "Location") +
  facet_wrap(~ family) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Radar plots

NEMMA

max_pc <- coral_fam_year_wide %>%
  filter(site_cat == "NEMMA") %>%
  select(contains("_mean") & 
           !contains("rel_mean") & 
           !contains("Unknown spp.") &
           where(~ any(. >= 0.001))) %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol_pc <- ncol(coral_fam_year_wide %>%
                  filter(site_cat == "NEMMA") %>%
                  select(contains("_mean") & 
                           !contains("rel_mean") & 
                           !contains("Unknown spp.") &
                           where(~ any(. >= 0.001)))
)

coral_rdr <- 
  rbind(rep(max_pc, ncol_pc), 
        rep(0, ncol_pc), 
        coral_fam_year_wide %>%
          filter(site_cat == "NEMMA") %>%
          column_to_rownames(var = "year") %>%
          select(contains("_mean") & 
                   !contains("rel_mean") & 
                   !contains("Unknown spp.") &
                   where(~ any(. >= 0.001))) %>%
          rename_with(~ gsub("_mean", "", .x))
        )
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

png("/Users/margaretwilson/Github/emc/agrra_monitoring/figs/coral_radar_NEMMA.png", width = 2000, height = 2000, res = 300)

# --- draw radar chart ---
radarchart(
  coral_rdr,
  pcol = line_colors,
  pfcol = fill_colors,
  cglcol = "grey"
)

# --- add legend ---
legend(
  x = "topright",
  legend = rownames(coral_rdr)[3:4],
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors
)

dev.off()
## quartz_off_screen 
##                 2
# radarchart(coral_rdr,
#            pcol = line_colors,
#            pfcol = fill_colors,
#            cglcol = "grey")
# 
# # doesn't like adding legend within rmarkdown - but can try within knit document
# legend(
#   x = "topright",
#   legend = rownames(coral_rdr)[3:4],  # skip max/min rows
#   col = line_colors,
#   lty = 1,
#   lwd = 2,
#   bty = "n",                   # no box
#   cex = 0.8,
#   pt.cex = 1.5,
#   fill = fill_colors            # show fill colors in legend
# )

NDNP

max_pc <- coral_fam_year_wide %>%
  filter(site_cat == "NDNP") %>%
  select(contains("_mean") & 
           !contains("rel_mean") & 
           !contains("Unknown spp.") &
           where(~ any(. >= 0.001))) %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol_pc <- ncol(coral_fam_year_wide %>%
                  filter(site_cat == "NDNP") %>%
                  select(contains("_mean") & 
                           !contains("rel_mean") & 
                           !contains("Unknown spp.") &
                           where(~ any(. >= 0.001)))
)

coral_rdr <- 
  rbind(rep(max_pc, ncol_pc), 
        rep(0, ncol_pc), 
        coral_fam_year_wide %>%
          filter(site_cat == "NDNP") %>%
          column_to_rownames(var = "year") %>%
          select(contains("_mean") & 
                   !contains("rel_mean") & 
                   !contains("Unknown spp.") &
                   where(~ any(. >= 0.001))) %>%
          rename_with(~ gsub("_mean", "", .x))
        )
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(coral_rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
  x = "topright",
  legend = rownames(coral_rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

Forest plots

Probably would show only NEMMA

# calculating paired changes per site, and 95% confidence intervals
coral_fam_site <- coral_spp_site %>%
  group_by(year, site_cat, site, family) %>%
  summarize(avg = mean(avg)) %>%
  filter(family != "Unknown spp.")

# --- compute paired deltas per site, then summarize per family × site_cat ---
df_change <- coral_fam_site %>%
  filter(year %in% c(2017, 2019, 2025)) %>%
  # pivot so each site has avg_2017, avg_2019, avg_2025 (NA where missing)
  pivot_wider(
    id_cols = c(site_cat, family, site),
    names_from = year,
    values_from = avg,
    names_glue = "avg_{year}"
  ) %>%
  # baseline = 2017 if present otherwise 2019
  mutate(
    baseline_value = coalesce(avg_2017, avg_2019)
  ) %>%
  # keep only sites that have both a baseline and 2025
  filter(!is.na(baseline_value), !is.na(avg_2025)) %>%
  # compute paired delta per site
  mutate(delta = avg_2025 - baseline_value) %>%
  # summarize to family × site_cat
  group_by(site_cat, family) %>%
  summarise(
    n_sites    = n(),
    mean_change = mean(delta, na.rm = TRUE),
    sd_change   = sd(delta, na.rm = TRUE),
    se_change   = sd_change / sqrt(n_sites),
    ci_low      = mean_change - 1.96 * se_change,
    ci_high     = mean_change + 1.96 * se_change,
    .groups = "drop"
  )

# quick diagnostic: show how many sites contributed per family × site_cat
print(df_change %>% arrange(site_cat, desc(n_sites)))
## # A tibble: 26 × 8
##    site_cat family     n_sites mean_change sd_change se_change   ci_low  ci_high
##    <chr>    <chr>        <int>       <dbl>     <dbl>     <dbl>    <dbl>    <dbl>
##  1 NDNP     Acroporid…      14  -0.0000904 0.000243  0.0000649 -2.17e-4  3.68e-5
##  2 NDNP     Agariciid…      14  -0.0000734 0.000179  0.0000478 -1.67e-4  2.03e-5
##  3 NDNP     Astrocoen…      14  -0.000614  0.000855  0.000228  -1.06e-3 -1.66e-4
##  4 NDNP     Faviidae        14  -0.0000245 0.000211  0.0000564 -1.35e-4  8.60e-5
##  5 NDNP     Meandrini…      14  -0.0000170 0.0000638 0.0000170 -5.04e-5  1.64e-5
##  6 NDNP     Merulinid…      14  -0.000257  0.000689  0.000184  -6.18e-4  1.04e-4
##  7 NDNP     Millepori…      14   0.000223  0.00136   0.000364  -4.91e-4  9.36e-4
##  8 NDNP     Montastra…      14  -0.00113   0.00359   0.000961  -3.01e-3  7.51e-4
##  9 NDNP     Oculinidae      14   0         0         0          0        0      
## 10 NDNP     Pocillopo…      14   0.0000960 0.000278  0.0000742 -4.95e-5  2.41e-4
## # ℹ 16 more rows
# --- forest plot: one point per family per panel (site_cat) ---
p <- ggplot(df_change %>% group_by(site_cat) %>% mutate(family = fct_reorder(family, mean_change)),
       aes(x = mean_change, y = family)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
  geom_pointrange(aes(xmin = ci_low, xmax = ci_high), size = 0.2) +
  facet_wrap(~ site_cat, scales = "free_y") +
  scale_x_continuous(labels = number_format(accuracy = 0.01), 
                     breaks = c(-0.01, 0.00, 0.01), 
                     limits = c(-0.02, 0.02)) +
  theme_bw(base_size = 12) +
  labs(
    x = "Change in percent cover",
    y = "Family")

ggsave(plot = p, here("agrra_monitoring", "figs", "coral_forest.png"), width = 8, height = 4)

Coral recruits

Data overview: - Historical: CoralRecruitsBySite.xlsx - family level - remove “DICH” code (not in 2025) - 2025: BenthicCoralRecruis.xlxs - species level - missing site-level standard deviation in data

Import + wrangle data

# using coral_meta_spp key generated in prev. section

# historical data

recruits_hist_sized <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "CoralRecruits", "CoralRecruitsBySite.xlsx"), sheet = "Small") %>% 
  mutate (size = "small") %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "CoralRecruits", "CoralRecruitsBySite.xlsx"), sheet = "Large") %>% 
          mutate (size = "large")) %>%
  rename_with(fix_names) %>%
  mutate(year = year(date)) %>%
  filter(year %in% c(2017, 2019)) %>% # 2022 was incomplete NPA year
  pivot_longer(
    cols = matches(".*_(avg|std)$"),   # all columns ending in avg or std
    names_to = c("family_code", ".value"),        # split into species and measurement type
    names_sep = "_"
  ) %>%
  select(site, year, family_code, size, avg) 

# adding totals because above is only small vs. large
recruits_hist_tot <- recruits_hist_sized %>%
  group_by(site, year, family_code) %>%
  summarize(
    avg = sum(avg, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(size = "total") %>%
  select(site, year, family_code, size, avg) 

recruits_hist <- bind_rows(recruits_hist_tot, recruits_hist_sized)

# 2025 data - switch to "Overall" sheet #########

recruits_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoralRecruits.xlsx"), sheet = "Overall") %>%
  bind_rows(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoralRecruits.xlsx"), sheet = "Overall")) %>%
  rename_with(fix_names) %>%
  mutate(year = 2025) %>%
  rename(site = name) %>%
  select(year, site, matches("_(small|large|total)$")) %>%
  pivot_longer(
    cols = matches("_(small|large|total)$"),
    names_to = c("family_code", "size"),
    names_pattern = "^(.*?)_(small|large|total)$",
    values_to = "avg"
  ) %>%
  select(site, year, family_code, size, avg) %>%
  mutate(family_code = str_remove(family_code, "^t_"))
# no SD for 2025

# merge for complete df
recruits_fam_site <- bind_rows(recruits_hist, recruits_2025) %>%
  mutate(site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA")),
         site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site),
         site = if_else(site == "Little Bird Patch", "Little Bird Backreef", site),
         family_code = toupper(family_code)) %>%
  left_join(coral_meta_spp %>%
              select(family_code, family) %>%
              distinct(), 
            by = "family_code") %>%
  mutate(family = if_else(family_code == "ALL", "All species", family)) %>%
  filter(family_code != "DICH") %>% # in historical data only, none present  
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()
  

recruits_fam_year <- recruits_fam_site %>%
  group_by(year, site_cat, family_code, family, size) %>%
  summarize(mean = mean(avg), 
            sd = sd(avg)) %>%
  group_by(year, site_cat) %>%
  mutate(rel_mean = mean / sum(mean)) %>%
  group_by(year, site_cat, family_code, family, size) %>%
  filter(sum(mean) > 0) %>% # removing species that were 0 in all years/site_cats
  ungroup()

Line plots by site

ggplot(recruits_fam_site %>%
         filter(family_code %in% c("ALL") & size == "total"), 
       aes(x = year, y = avg, color = site)) +
  geom_point() +
  geom_line() + 
  labs(x = "Year", y = "Recruit density (indv/m2)", color = "Location") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(recruits_fam_site %>%
         filter(family_code != "ALL" & size == "total"), 
       aes(x = year, y = avg, color = site)) +
  geom_point() +
  geom_line() + 
  labs(x = "Year", y = "Recruit density (indv/m2)", color = "Location") +
  facet_wrap(~ family) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Box plots by year

recruits_tot_site <- recruits_fam_site %>%
  filter(family_code == "ALL" & size == "total")

p1 <- make_bp(recruits_tot_site, "NEMMA",
        yvar = "avg", ylab = expression("Coral recruits (indv. m"^{-2}*")"),
        y_max = 16,
        title = TRUE) +
  scale_y_continuous()

p2 <- make_bp(recruits_tot_site, "NDNP",
        yvar = "avg", ylab = expression("Coral recruits (indv. m"^{-2}*")"),
        y_max = 16,
        title = TRUE,
        remove_y_text = FALSE) +
  scale_y_continuous()

final_plot <- (p1 | p2)
final_plot

ggsave(here("agrra_monitoring", "figs", "recruits.png"), plot = final_plot, width = 6, height = 4)

Fish

Data overview:

Notes + questions:

  • To include commercially significant species, would have to go back to raw 2025 data or do custom calculations by family

Biomass

Data overview:

  • 2017: FishBiomassByTransect.xlsx
    • “Data” sheet has transect-level data by family and group
  • 2025: FishBiomassByTransect.xlsx
    • “Overall” sheet has transect-level data by family
    • Family sheets have transect-level data by species

Import + wrangle data

# create compatible family key by merging metadata
fish_fam_hist_temp <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishBiomass", "FishBiomassBySite.xlsx"), sheet = "Key") %>% 
  clean_names() %>%
  slice(21:40) %>%
  select(code_com = 1,
         definition = 2) %>%
  mutate(family = str_remove(word(definition, 1), ":"),
         family = if_else(family == "Wrasse", "Wrasses", 
                          if_else(code_com == "PUFF", "Pufferfishes", family))) %>%
  select(-definition)

fish_fam_2025_temp <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishBiomass.xlsx"), sheet = "Metadata") %>% 
  clean_names() %>%
  slice(23:42) %>%
  select(code_lat = 2,
         definition = 3) %>%
  mutate(family = str_remove(word(definition, 1), ":")) %>%
  select(-definition)

fish_fam_key <- full_join(fish_fam_hist_temp, fish_fam_2025_temp, by = "family") %>%
  mutate(code_lat = tolower(gsub("^t", "", code_lat)),
         code_com = tolower(code_com)) %>%
  select(code_com, code_lat, family) %>%
  bind_rows(
    tibble(code_com = "t",
           code_lat = "all",
           family   = "Total"))

# import data
fish_bm_hist_temp <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishBiomass", "FishBiomassBySite.xlsx"), sheet = "Data") %>% 
  rename_with(fix_names) %>%
  mutate(year = year(date)) %>%
  filter(nt >= 8) %>% # keeping only sites with 8 or more transects
  select(site, year, contains("avg"), contains("std")) %>%
  pivot_longer(
    cols = -c(site, year),  
    names_to = "family_stat",      
    values_to = "value"                 
  ) %>%
  separate(family_stat, into = c("code_com", "stat"), sep = "_(?=[^_]+$)") %>%
  left_join(fish_fam_key, by = "code_com") %>%
  select(site, year, code = code_lat, family, stat, value)

fish_bm_2025_temp <- rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishBiomass.xlsx"), sheet = "Overall"),
                      read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishBiomass.xlsx"), sheet = "Overall")) %>%
  # deal with weird capitalization in species col names
  rename_with(fix_names) %>%
  filter(nt >= 8) %>% # keeping only sites with 8 or more transects
  mutate(year = 2025,
         site = if_else(name == "Little Bird Patch", "Little Bird Backreef", name)) %>%
  select(site, year, contains("avg"), contains("std")) %>%
  rename_with(~ gsub("^t_", "", .x), .cols = -c(site, year)) %>%
  pivot_longer(
    cols = -c(site, year),  
    names_to = "family_stat",      
    values_to = "value"                 
  ) %>%
  separate(family_stat, into = c("code_lat", "stat"), sep = "_(?=[^_]+$)") %>%
  left_join(fish_fam_key, by = "code_lat") %>%
  select(site, year, code = code_lat, family, stat, value)

# merge datasets
fish_bm_site_temp <- rbind(fish_bm_hist_temp, fish_bm_2025_temp) %>%
 mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA site inconsistencies
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  mutate(year = if_else(year == 2024, 2025, year)) %>% # one erroneous 2024 date
  filter(year != 2022,
         !is.na(family)) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # keeping only sites with over time comparisons
  ungroup()

herb_bm_site_temp <- fish_bm_site_temp %>%
  filter(code %in% c("scar", "acan")) %>%
  group_by(site, site_cat, year, stat) %>%
  summarise(
    value = if_else(stat == "avg", sum(value, na.rm = TRUE), 
                    sqrt(sum(value^2, na.rm = TRUE))), # for std
    .groups = "drop"
  ) %>%
  mutate(family = "Herbivores", code = "herb") %>%
  distinct()

fish_bm_site <- rbind(fish_bm_site_temp, herb_bm_site_temp) %>%
  pivot_wider(
    names_from = stat,   # use 'avg' and 'std' as new column names
    values_from = value  # fill those columns with the numeric values
  ) %>%
  unnest(c(avg, std)) # make numeric instead of list
# wide version in benthic ~ fish section

fish_bm_year <- fish_bm_site %>%
  group_by(year, site_cat, family, code) %>%
  summarize(mean = mean(avg),
            std = sd(avg)) %>%
  # don't actually need this complete() here, but including as precaution
  group_by(year, site_cat) %>%
  complete(
    family = unique(fish_bm_site$family),
    fill = list(mean = 0)) %>% # list(mean = 0, rel_mean = 0)
  ungroup()

# wide format for graphing purposes
fish_bm_year_wide <- fish_bm_year %>%
  select(-code) %>%
  pivot_wider(names_from = family,
              values_from = c(mean, std),
              names_glue = "{family}_{.value}")

# clean environment and export
# rm(list = ls(pattern = "_temp$"))
write.csv(fish_bm_year, here("agrra_monitoring", "data_outputs", "fish_bm_year.csv"))

Line plots

ggplot(fish_bm_site %>% filter(code == "all"), 
       aes(x = year, y = avg, group = site, color = site)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Total fish biomass (g/100m2)", color = "Site", group = "Site") +
  theme_minimal()

ggplot(fish_bm_site %>% filter(code == "scar"), 
       aes(x = year, y = avg, group = site, color = site)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Scarid biomass (g/100m2)", color = "Site", group = "Site") +
  theme_minimal()

ggplot(fish_bm_site %>% filter(code == "acan"), 
       aes(x = year, y = avg, group = site, color = site)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Acanthurid biomass (g/100m2)", color = "Site", group = "Site") +
  theme_minimal()

ggplot(fish_bm_site %>% filter(code == "serr"), 
       aes(x = year, y = avg, group = site, color = site)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Serranid biomass (g/100m2)", color = "Site", group = "Site") +
  theme_minimal()

ggplot(fish_bm_year, aes(x = year, y = mean, group = site_cat, color = site_cat)) +
  geom_point() +
  geom_line() + 
  labs(x = "Year", y = "Biomass", color = "Location") +
  facet_wrap(~ family) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Radar plots

NEMMA

data <- fish_bm_year_wide %>%
  filter(site_cat == "NEMMA") %>%
  column_to_rownames(var = "year") %>%
  select(contains("_mean") & 
           !contains("rel_mean") & 
           !contains(c("Total", "Herbivores")) &
           where(~ any(. >= 100))) %>%
  rename_with(~ gsub("_mean", "", .x))

max <- data %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol <- ncol(data)

rdr <- 
  rbind(rep(max, ncol), 
        rep(0, ncol), 
        data)
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
  x = "topright",
  legend = rownames(rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

NDNP

data <- fish_bm_year_wide %>%
  filter(site_cat == "NDNP") %>%
  column_to_rownames(var = "year") %>%
  select(contains("_mean") & 
           !contains("rel_mean") & 
           !contains(c("Total", "Herbivores")) &
           where(~ any(. >= 100))) %>% # at least one mean bm value must be over 100
  rename_with(~ gsub("_mean", "", .x))

max <- data %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol <- ncol(data)

rdr <- 
  rbind(rep(max, ncol), 
        rep(0, ncol), 
        data)
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

legend(
  x = "topright",
  legend = rownames(rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

Density

Import + wrangle data

# import data
fish_den_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishDensity", "FishDensityBySite.xlsx"), sheet = "Data") %>% 
  rename_with(fix_names) %>%
  mutate(year = year(date)) %>%
  filter(nt >= 8) %>% # keeping only sites with 8 or more transects
  select(site, year, contains("avg"), contains("std")) %>%
  pivot_longer(
    cols = -c(site, year),  
    names_to = "family_stat",      
    values_to = "value"                 
  ) %>%
  separate(family_stat, into = c("code_com", "stat"), sep = "_(?=[^_]+$)") %>%
  left_join(fish_fam_key, by = "code_com") %>%
  select(site, year, code = code_lat, family, stat, value)

fish_den_2025 <- rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishDensity.xlsx"), sheet = "Overall"),
                      read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishDensity.xlsx"), sheet = "Overall")) %>%
  # deal with weird capitalization in species col names
  rename_with(fix_names) %>%
  filter(nt >= 8) %>% # keeping only sites with 8 or more transects
  mutate(year = 2025,
         site = if_else(name == "Little Bird Patch", "Little Bird Backreef", name)) %>%
  select(site, year, contains("avg"), contains("std")) %>%
  rename_with(~ gsub("^t_", "", .x), .cols = -c(site, year)) %>%
  pivot_longer(
    cols = -c(site, year),  
    names_to = "family_stat",      
    values_to = "value"                 
  ) %>%
  separate(family_stat, into = c("code_lat", "stat"), sep = "_(?=[^_]+$)") %>%
  left_join(fish_fam_key, by = "code_lat") %>%
  select(site, year, code = code_lat, family, stat, value)

# merge datasets
fish_den_site <- rbind(fish_den_hist, fish_den_2025) %>%
 mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA site inconsistencies
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  mutate(year = if_else(year == 2024, 2025, year)) %>% # one erroneous 2024 date
  filter(year != 2022,
         !is.na(family)) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # keeping only sites with over time comparisons
  ungroup() %>%
  pivot_wider(
    names_from = stat,   # use 'avg' and 'std' as new column names
    values_from = value  # fill those columns with the numeric values
  ) %>%
  unnest(c(avg, std)) # make numeric instead of list

Forest plots

# calculating paired changes per site, and 95% confidence intervals

# --- compute paired deltas per site, then summarize per family × site_cat ---
df_change <- fish_den_site %>%
  filter(year %in% c(2017, 2019, 2025)) %>%
  # pivot so each site has avg_2017, avg_2019, avg_2025 (NA where missing)
  pivot_wider(
    id_cols = c(site_cat, family, site),
    names_from = year,
    values_from = avg,
    names_glue = "avg_{year}"
  ) %>%
  # baseline = 2017 if present otherwise 2019
  mutate(
    baseline_value = coalesce(avg_2017, avg_2019)
  ) %>%
  # keep only sites that have both a baseline and 2025
  filter(!is.na(baseline_value), !is.na(avg_2025)) %>%
  # compute paired delta per site
  mutate(delta = avg_2025 - baseline_value) %>%
  # summarize to family × site_cat
  group_by(site_cat, family) %>%
  summarise(
    n_sites    = n(),
    mean_change = mean(delta, na.rm = TRUE),
    sd_change   = sd(delta, na.rm = TRUE),
    se_change   = sd_change / sqrt(n_sites),
    ci_low      = mean_change - 1.96 * se_change,
    ci_high     = mean_change + 1.96 * se_change,
    .groups = "drop"
  )

# quick diagnostic: show how many sites contributed per family × site_cat
print(df_change %>% arrange(site_cat, desc(n_sites)))
## # A tibble: 42 × 8
##    site_cat family       n_sites mean_change sd_change se_change  ci_low ci_high
##    <chr>    <chr>          <int>       <dbl>     <dbl>     <dbl>   <dbl>   <dbl>
##  1 NDNP     Angelfishes       14    -0.285       0.391    0.105  -0.490  -0.0804
##  2 NDNP     Barracudas        14     0.00516     0.127    0.0340 -0.0614  0.0717
##  3 NDNP     Boxfishes         14    -0.106       0.149    0.0397 -0.184  -0.0281
##  4 NDNP     Butterflyfi…      14    -1.42        1.47     0.393  -2.19   -0.647 
##  5 NDNP     Chubs             14    -0.0643      0.214    0.0571 -0.176   0.0477
##  6 NDNP     Damselfishes      14    -0.475       0.672    0.180  -0.828  -0.123 
##  7 NDNP     Filefishes        14    -0.348       0.392    0.105  -0.554  -0.143 
##  8 NDNP     Groupers          14    -6.64        2.85     0.762  -8.14   -5.15  
##  9 NDNP     Grunts            14    -3.97        5.65     1.51   -6.93   -1.02  
## 10 NDNP     Jacks             14    -0.151       0.777    0.208  -0.558   0.256 
## # ℹ 32 more rows
# --- forest plot: one point per family per panel (site_cat) ---
p <- ggplot(df_change %>% group_by(site_cat) %>% mutate(family = fct_reorder(family, mean_change)),
       aes(x = mean_change, y = family)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
  geom_pointrange(aes(xmin = ci_low, xmax = ci_high), size = 0.2) +
  facet_wrap(~ site_cat, scales = "free_y") +
  theme_bw(base_size = 12) +
  labs(
    x = expression("Change in density (indv. 100m"^{-2}*")"),
    y = "Family")

ggsave(plot = p, here("agrra_monitoring", "figs", "fish_den_forest.png"), width = 6, height = 4)

Length

Data overview:

  • 2017-2022: FishSizeFreqByTransect.xlsx (binned data only)
    • NF (number of fish) + raw numbers and % in each size bin
    • % is out of 100
    • Everything above 40cm gets aggregated
    • One sheet per family has transect-level size data
  • 2025: FishSizeFreqByTransect.xlsx (binned data only)
    • NF (number of fish) + % in each size bin
    • % is out of one (proportion)
    • Over 40cm, bins are by 10cm to the nearest 10cm until <200cm
    • “Overall” sheet has transect-level size data
    • Family sheets have data by family

Notes + questions:

  • Dealing with >40cm fish given aggregation into one group in historical data? Need to check raw data
  • Correct to exclude transects with no fish and include in mean size calculations?
  • Weighted means: calculate midpoint representation of each size bin and weight by the number of counts per bin

Scarids

Import + wrangle data

# starting with scarids only as mock-up
scar_length_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishSizeFreq", "FishSizeFreqByTransect.xlsx"), sheet = "PARR") %>%
  clean_names() %>%
  filter(!is.na(site)) %>%
  mutate(year = year(date),
         across(
    .cols = contains("percent_"),
    .fns  = ~ .x / 100)) %>%
  select(site, year, transect = trans, nf, contains("percent_")) %>%
  rename_with(~ str_remove_all(.x, "percent_")) %>%
  rename("41_up" = "40") %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSCAR") %>%
          clean_names() %>%
          mutate(year = 2025,
                 survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name)) %>%
          select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
          rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
          rowwise() %>% 
          mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>% # select only columns with digits (e.g., 50, 60) and sum across these to be consistent with 2017 data, should not affect most of our fish families of interest
          ungroup() %>%
          select(!matches("^\\d+$"))) %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSCAR") %>%
          clean_names() %>%
          mutate(year = 2025) %>%
          select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
          rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
          rowwise() %>% 
          mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>%
          ungroup() %>%
          select(!matches("^\\d+$"))) %>%
  mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA sites that have site numbers with different name variations afterwards
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  filter(year != 2022) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site, year) %>%
  filter(n_distinct(transect) >= 8) %>% # keeping only sites with at least 8 transects per year
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()

# check that all transects are present even if no fish per family were recorded - should have 0 entries if all is well
# chk <- full_join(scar_length_trans %>%
#                    group_by(year, site) %>%
#                    summarize(count = n()), 
#                  fish_bm_trans %>%
#                    group_by(year, site) %>%
#                    summarize(count = n()), 
#                  by = c("year", "site"), suffix = c("_length", "_bm")) %>%
#   filter(count_length != count_bm | is.na(count_length) | is.na(count_bm))

# transform data for summaries
scar_length_long <- scar_length_trans %>%
  filter(nf != 0) %>%
  pivot_longer(
    cols = matches("^\\d+_?\\d?"), 
    names_to = "length_mid", 
    values_to = "percent"
  ) %>%
  mutate(
    # convert bin names like "0_5" -> 2.5
    length_mid = str_extract(length_mid, "\\d+_?\\d*"),
    length_mid = ifelse(str_detect(length_mid, "_"),
                        sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
                        as.numeric(length_mid))) %>%
  group_by(year, site, transect) %>%
  mutate(prop = percent / sum(percent)) %>% # accounts for small deviations where total proportions don't equal exactly 1
  ungroup() %>%
  select(-percent)

scar_length_site <- scar_length_long %>%
  group_by(year, site, site_cat, length_mid) %>%
  summarise(mean_prop = mean(prop), .groups = "drop")

scar_length_year <- scar_length_site %>%
  group_by(year, site_cat, length_mid) %>%
  summarise(mean_prop = mean(mean_prop), .groups = "drop")

Density plots

ggplot(scar_length_year %>% 
         mutate(year = as.character(year)), 
       aes(x = length_mid, weight = mean_prop, group = year, color = year, fill = year)) +
  geom_density(alpha = 0.3, adjust = 1) +
  facet_wrap(~ site_cat, ncol = 2) +
  labs(x = "Fish length (cm) - Scaridae", y = "Density") +
  theme_minimal()

Chi-square tests

# transform data for summaries
scar_length_long_binned <- scar_length_trans %>%
  filter(nf != 0) %>%
  pivot_longer(
    cols = matches("^\\d+_?\\d?"), 
    names_to = "size_bin", 
    values_to = "percent"
  ) %>%
  rename(nf_tot = nf) %>%
  mutate(nf_bin = nf_tot * percent)

scar_counts <- scar_length_long_binned %>%
  mutate(size_bin = case_when(
    size_bin %in% c("21_30", "31_40", "41_up") ~ "21_up",
    TRUE ~ size_bin)) %>%
  group_by(year, site_cat, size_bin) %>%
  summarise(count = sum(nf_bin), .groups = "drop")
NEMMA
nemma_table <- scar_counts %>%
  filter(site_cat == "NEMMA") %>%
  select(year, size_bin, count) %>%
  pivot_wider(names_from = year, values_from = count, values_fill = 0)

chisq_result_nemma <- chisq.test(as.matrix(nemma_table[,-1]))
chisq_result_nemma
## 
##  Pearson's Chi-squared test
## 
## data:  as.matrix(nemma_table[, -1])
## X-squared = 81.011, df = 3, p-value < 2.2e-16
NDNP
ndnp_table <- scar_counts %>%
  filter(site_cat == "NDNP") %>%
  select(year, size_bin, count) %>%
  pivot_wider(names_from = year, values_from = count, values_fill = 0)

chisq_result_ndnp <- chisq.test(as.matrix(ndnp_table[,-1]))
chisq_result_ndnp
## 
##  Pearson's Chi-squared test
## 
## data:  as.matrix(ndnp_table[, -1])
## X-squared = 15.239, df = 3, p-value = 0.001623

Mosaic plots

tab <- scar_counts %>%
  filter(site_cat == "NEMMA") %>%
  mutate(size_bin = factor(size_bin, levels = c("0_5","6_10","11_20","21_up"))) %>%
  arrange(size_bin) %>%
  pivot_wider(names_from = year, values_from = count, values_fill = 0) %>%
  select(-site_cat)

# convert to matrix, drop the size_bin column
mat <- as.matrix(tab %>% select(-size_bin))
row.names(mat) <- tab$size_bin

mosaicplot(mat, 
           color = TRUE,           # color each year
           main = paste("Fish size distribution by year —", "NEMMA"),
           xlab = "Size Bin",
           ylab = "Year",
           las = 1)                # rotate y-axis labels

tab <- scar_counts %>%
  filter(site_cat == "NDNP") %>%
  mutate(size_bin = factor(size_bin, levels = c("0_5","6_10","11_20","21_up"))) %>%
  arrange(size_bin) %>%
  pivot_wider(names_from = year, values_from = count, values_fill = 0) %>%
  select(-site_cat)

# convert to matrix, drop the size_bin column
mat <- as.matrix(tab %>% select(-size_bin))
row.names(mat) <- tab$size_bin

mosaicplot(mat, 
           color = TRUE,           # color each year
           main = paste("Fish size distribution by year —", "NDNP"),
           xlab = "Size Bin",
           ylab = "Year",
           las = 1)                # rotate y-axis labels

Phase changes

Currently only 2025 data, because appears that 2017/2019 have no phase data…?

# raw historical data

fish_tax_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "FishTaxonomy", skip = 1, col_names = TRUE) %>% # deals with merged headers
  clean_names() %>%
  unite(species, genus, species, sep = " ", remove = TRUE)

fish_raw_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-FishRaw.xlsx"), sheet = "Counts Raw") %>%
  clean_names() %>%
  left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Transect") %>%
              clean_names() %>%
              rename(transect = id, date = surveyed) %>%
              left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Survey") %>%
                          clean_names() %>%
                          select(survey = id, site = name))) %>%
  filter(!is.na(site)) %>%
  mutate(year = year(date),
         size_class = str_replace_all(size_class, "-", "_"),
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA")),
         # convert bin names like "0_5" -> 2.5
         length_mid = str_extract(size_class, "\\d+_?\\d*"),
         length_mid = ifelse(str_detect(length_mid, "_"),
                             sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
                             as.numeric(length_mid))) %>%
  left_join(fish_tax_hist, by = c("taxonomy" = "id")) %>%
  select(year, site, site_cat, trans = transect, size_class, length_mid, common_name, common_family, family, species) # no terminal phase in this data?

# raw 2025 data

fish_tax_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "Metadata.xlsx"), sheet = "FishTaxonomy", skip = 1, col_names = TRUE) %>% # deals with merged headers
  clean_names() %>%
  unite(species, genus, species, sep = " ", remove = TRUE)
# could replace NA species with 'spp.' before uniting

chk <- fish_tax_hist %>% # make sure taxonomic lookups are consistent
  full_join(fish_tax_2025, by = "id", suffix = c("_hist", "_2025")) %>%
  mutate(match = species_hist == species_2025)

fish_raw_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "FishRaw.xlsx"), sheet = "Counts Raw") %>%
  clean_names() %>%
  left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "Metadata.xlsx"), sheet = "Transect") %>%
              clean_names() %>%
              select(transect = id, survey, date = surveyed) %>%
              left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "Metadata.xlsx"), sheet = "Survey") %>%
                          clean_names() %>%
                          select(survey = id, site = name))) %>%
  bind_rows(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Raw", "FishRaw.xlsx"), sheet = "Counts Raw") %>%
              clean_names() %>%
              left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Raw", "Metadata.xlsx"), sheet = "Transect") %>%
                          clean_names() %>%
                          select(transect = id, survey, date = surveyed) %>%
                          left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Raw", "Metadata.xlsx"), sheet = "Survey") %>%
                                      clean_names() %>%
                                      select(survey = id, site = name)))
  ) %>%
  clean_names() %>%
  mutate(year = year(date),
         size_class = str_replace_all(size_class, " - ", "_"),
         size_class = str_replace_all(size_class, "cm", ""),
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA")),
         # convert bin names like "0_5" -> 2.5
         length_mid = str_extract(size_class, "\\d+_?\\d*"),
         length_mid = ifelse(str_detect(length_mid, "_"),
                             sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
                             as.numeric(length_mid))) %>%
  left_join(fish_tax_2025, by = c("taxonomy" = "id")) %>%
  select(year, site, site_cat, trans = transect, size_class, length_mid, common_name, common_family, family, species, terminal_phase)

# merge all years

# fish_raw <- bind_rows(fish_raw_hist, fish_raw_2025)

# scarids only

scarids <- fish_raw_2025 %>%
  filter(species %in% c("Sparisoma viride", "Sparisoma aurofrenatum", "Scarus vetula", "Scarus iseri")) %>%
  mutate(sex = case_when(terminal_phase == "No" ~ "Female",
                         terminal_phase == "Yes" ~ "Male"),
         size_class = factor(size_class, levels = c("0_5","6_10","11_20","21_30", "31_40", "41_50")))

ggplot(scarids, aes(x = length_mid, fill = sex)) +
  geom_histogram(stat = "count", alpha = 0.8, position = position_dodge()) +
  facet_wrap(vars(species), ncol = 4) +
  geom_vline(data = filter(scarids, species == "Sparisoma viride"), aes(xintercept = 18, color = "Minimum length at maturity"), linetype = "dashed") +
  geom_vline(data = filter(scarids, species == "Scarus vetula"), aes(xintercept = 19, color = "Minimum length at maturity"), linetype = "dashed") +
  geom_vline(data = filter(scarids, species == "Sparisoma aurofrenatum"), aes(xintercept = 15, color = "Minimum length at maturity"), linetype = "dashed") +
  geom_vline(data = filter(scarids, species == "Scarus iseri"), aes(xintercept = 19, color = "Minimum length at maturity"), linetype = "dashed") +
  scale_color_manual(values = c("black")) +
  scale_fill_manual(values=c("pink2", "turquoise3")) +
  theme_bw() +
  labs(x = "Fish length (cm)", y = "Number observed", fill="") +
  theme(strip.text = element_text(face = "italic"),
        legend.title = element_blank(),
        legend.position = "top",
        axis.text.x = element_text(angle = 45, hjust = 1))

Serranids

Import + wrangle data

# starting with scarids only as mock-up
serr_length_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishSizeFreq", "FishSizeFreqByTransect.xlsx"), sheet = "GROU") %>%
  clean_names() %>%
  filter(!is.na(site)) %>%
  mutate(year = year(date),
         across(
    .cols = contains("percent_"),
    .fns  = ~ .x / 100)) %>%
  select(site, year, transect = trans, nf, contains("percent_")) %>%
  rename_with(~ str_remove_all(.x, "percent_")) %>%
  rename("41_up" = "40") %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSERR") %>%
          clean_names() %>%
          mutate(year = 2025,
                 survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name)) %>%
          select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
          rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
          rowwise() %>% 
          mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>% # select only columns with digits (e.g., 50, 60) and sum across these to be consistent with 2017 data, should not affect most of our fish families of interest
          ungroup() %>%
          select(!matches("^\\d+$"))) %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSERR") %>%
          clean_names() %>%
          mutate(year = 2025) %>%
          select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
          rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
          rowwise() %>% 
          mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>%
          ungroup() %>%
          select(!matches("^\\d+$"))) %>%
  mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA sites that have site numbers with different name variations afterwards
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  filter(year != 2022) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site, year) %>%
  filter(n_distinct(transect) >= 8) %>% # keeping only sites with at least 8 transects per year
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()

# check that all transects are present even if no fish per family were recorded - should have 0 entries if all is well
# chk <- full_join(serr_length_trans %>%
#                    group_by(year, site) %>%
#                    summarize(count = n()), 
#                  fish_bm_trans %>%
#                    group_by(year, site) %>%
#                    summarize(count = n()), 
#                  by = c("year", "site"), suffix = c("_length", "_bm")) %>%
#   filter(count_length != count_bm | is.na(count_length) | is.na(count_bm))

# transform data for summaries
serr_length_long <- serr_length_trans %>%
  filter(nf != 0) %>%
  pivot_longer(
    cols = matches("^\\d+_?\\d?"), 
    names_to = "length_mid", 
    values_to = "percent"
  ) %>%
  mutate(
    # convert bin names like "0_5" -> 2.5
    length_mid = str_extract(length_mid, "\\d+_?\\d*"),
    length_mid = ifelse(str_detect(length_mid, "_"),
                        sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
                        as.numeric(length_mid))) %>%
  group_by(year, site, transect) %>%
  mutate(prop = percent / sum(percent)) %>% # accounts for small deviations where total proportions don't equal exactly 1
  ungroup() %>%
  select(-percent)

serr_length_site <- serr_length_long %>%
  group_by(year, site, site_cat, length_mid) %>%
  summarise(mean_prop = mean(prop), .groups = "drop")

serr_length_year <- serr_length_site %>%
  group_by(year, site_cat, length_mid) %>%
  summarise(mean_prop = mean(mean_prop), .groups = "drop")

Density plots

ggplot(serr_length_year %>% 
         mutate(year = as.character(year)), 
       aes(x = length_mid, weight = mean_prop, group = year, color = year, fill = year)) +
  geom_density(alpha = 0.3, adjust = 1) +
  facet_wrap(~ site_cat, ncol = 2) +
  labs(x = "Fish length (cm) - Serranidae", y = "Density") +
  theme_minimal()

Benthic ~ Fish

Integrate datasets

benth_fish_site <- fish_bm_site %>%
  select(-family) %>%
  rename(mean = avg) %>%
  pivot_wider(names_from = code,
              values_from = c(mean, std),
              names_glue = "{code}_{.value}") %>%
  left_join(benthic_cover_site, by = c("site", "site_cat", "year"))

Exploratory models and plots

No statistically significant relationships found (including when scarid and acanthurids are aggregated)

mod <- lm(fma_mean ~ scar_mean, data = benth_fish_site)
summary(mod)
## 
## Call:
## lm(formula = fma_mean ~ scar_mean, data = benth_fish_site)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23116 -0.11016 -0.01080  0.08115  0.38401 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.083e-01  3.988e-02   7.730 3.71e-09 ***
## scar_mean   -3.646e-05  2.748e-05  -1.327    0.193    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1393 on 36 degrees of freedom
## Multiple R-squared:  0.04664,    Adjusted R-squared:  0.02016 
## F-statistic: 1.761 on 1 and 36 DF,  p-value: 0.1928
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]

ggplot(benth_fish_site, aes(x = scar_mean, y = fma_mean)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
  labs(x = "Scarid biomass", y = "FMA cover") +
  annotate("text", x = Inf, y = Inf,
           label = paste0("R² = ", round(r2, 2), 
                          "\nP = ", signif(pval, 2)),
           hjust = 1.1, vjust = 1.2, size = 4) +
  theme_bw()

mod <- lm(fma_mean ~ herb_mean, data = benth_fish_site)
summary(mod)
## 
## Call:
## lm(formula = fma_mean ~ herb_mean, data = benth_fish_site)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20618 -0.10757 -0.01420  0.07757  0.41048 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.203e-01  4.543e-02   7.051 2.82e-08 ***
## herb_mean   -2.266e-05  1.607e-05  -1.410    0.167    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1389 on 36 degrees of freedom
## Multiple R-squared:  0.05235,    Adjusted R-squared:  0.02602 
## F-statistic: 1.989 on 1 and 36 DF,  p-value: 0.1671
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]

ggplot(benth_fish_site, aes(x = herb_mean, y = fma_mean)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
  labs(x = "Herbivore biomass", y = "FMA cover") +
  annotate("text", x = Inf, y = Inf,
           label = paste0("R² = ", round(r2, 2), 
                          "\nP = ", signif(pval, 2)),
           hjust = 1.1, vjust = 1.2, size = 4) +
  theme_bw()

mod <- lm(lc_mean ~ scar_mean, data = benth_fish_site)
summary(mod)
## 
## Call:
## lm(formula = lc_mean ~ scar_mean, data = benth_fish_site)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.05273 -0.02691 -0.01161  0.01497  0.15619 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.675e-02  1.209e-02   3.869 0.000442 ***
## scar_mean   6.717e-06  8.326e-06   0.807 0.425141    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04221 on 36 degrees of freedom
## Multiple R-squared:  0.01776,    Adjusted R-squared:  -0.009529 
## F-statistic: 0.6508 on 1 and 36 DF,  p-value: 0.4251
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]

ggplot(benth_fish_site, aes(x = scar_mean, y = lc_mean)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
  labs(x = "Scarid biomass", y = "Live coral cover") +
  annotate("text", x = Inf, y = Inf,
           label = paste0("R² = ", round(r2, 2), 
                          "\nP = ", signif(pval, 2)),
           hjust = 1.1, vjust = 1.2, size = 4) +
  theme_bw()

mod <- lm(lc_mean ~ herb_mean, data = benth_fish_site)
summary(mod)
## 
## Call:
## lm(formula = lc_mean ~ herb_mean, data = benth_fish_site)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.045407 -0.028903 -0.009291  0.014008  0.156349 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.741e-02  1.392e-02   4.124  0.00021 ***
## herb_mean   -1.069e-06  4.924e-06  -0.217  0.82931    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04256 on 36 degrees of freedom
## Multiple R-squared:  0.001308,   Adjusted R-squared:  -0.02643 
## F-statistic: 0.04716 on 1 and 36 DF,  p-value: 0.8293
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]

ggplot(benth_fish_site, aes(x = herb_mean, y = lc_mean)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
  labs(x = "Herbivore biomass", y = "Live coral cover") +
  annotate("text", x = Inf, y = Inf,
           label = paste0("R² = ", round(r2, 2), 
                          "\nP = ", signif(pval, 2)),
           hjust = 1.1, vjust = 1.2, size = 4) +
  theme_bw()

Correlation tests

Testing correlation between scarid biomass and benthic variables - nothing is notably strong

benth_vars <- benth_fish_site %>%
  select(lc_mean, cca_mean, ta_mean, tas_mean, fma_mean, cyan_mean, ainv_mean, peys_mean)

cor_results <- cor(benth_vars, benth_fish_site$scar_mean, use = "pairwise.complete.obs")
cor_results
##                  [,1]
## lc_mean    0.13324977
## cca_mean   0.38089582
## ta_mean    0.21085969
## tas_mean  -0.18002397
## fma_mean  -0.21595995
## cyan_mean  0.25669404
## ainv_mean  0.36476851
## peys_mean -0.01637593

Multivariate tests (PERMANOVA)

benth_mat <- benth_fish_site %>%
  select(lc_mean, cca_mean, ta_mean, tas_mean, fma_mean)

adonis2(benth_mat ~ scar_mean + site_cat + year,
        data = benth_fish_site,
        method = "bray",
        permutations = 999,
        by = "term")
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = benth_mat ~ scar_mean + site_cat + year, data = benth_fish_site, permutations = 999, method = "bray", by = "term")
##           Df SumOfSqs      R2       F Pr(>F)    
## scar_mean  1  0.15087 0.05318  5.3525  0.005 ** 
## site_cat   1  1.13209 0.39906 40.1633  0.001 ***
## year       1  0.59559 0.20994 21.1298  0.001 ***
## Residual  34  0.95837 0.33782                   
## Total     37  2.83693 1.00000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res <- adonis2(benth_mat ~ scar_mean + site_cat + year,
               data = benth_fish_site,
               method = "bray",
               permutations = 999,
               by = "term")

res_df <- broom::tidy(res)

ggplot(res_df, aes(x = reorder(term, R2), y = R2)) +
  geom_col(fill = "skyblue") +
  coord_flip() +
  labs(x = "Predictor", y = "Proportion of variation explained (R²)") +
  theme_bw()

Archive

access_meta_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Transect") %>%
  clean_names() %>% 
  left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Survey") %>%
              clean_names() %>%
              rename(survey = id)) %>%
  select(transect = id, date = surveyed, site = name)
  


# checking percent that are unknown
chk <- coral_spp_hist %>%
  mutate(category = if_else(species == "unkn", "unknown", "known")) %>%
  group_by(site, year, category) %>%
  summarize(cover = sum(avg))

# old biomass import code - holding for now
fish_bm_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishBiomass", "FishBiomassByTransect.xlsx"), sheet = "Data") %>% 
  clean_names() %>%
  filter(!is.na(site)) %>%
  mutate(year = year(date)) %>% 
  select(site, year, transect = trans, t_tot = t, t_scar = parr, t_acan = surg, t_serr = grou) %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishBiomassByTransect.xlsx"), sheet = "Overall") %>% 
          clean_names() %>%
          mutate(year = year(surveyed),
                 survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name)) %>% # inconsistency in site name between 2017 and 2025
          select(site = survey_name, year, transect = transect_name, t_tot = all, t_scar, t_acan, t_serr)) %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishBiomassByTransect.xlsx"), sheet = "Overall") %>% 
          clean_names() %>%
          mutate(year = year(surveyed)) %>%
          select(site = survey_name, year, transect = transect_name, t_tot = all, t_scar, t_acan, t_serr)) %>%
  mutate(t_herb = t_scar + t_acan,
         site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA sites that have site numbers with different name variations afterwards
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  mutate(year = if_else(year == 2024, 2025, year)) %>% # Site 11 had one erroneous 2024 date
  filter(year != 2022) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site, year) %>%
  filter(n_distinct(transect) >= 8) %>% # keeping only sites with at least 8 transects per year
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # keeping only sites with over time comparisons
  ungroup()

fish_bm_site <- fish_bm_trans %>%
  group_by(site, site_cat, year) %>%
  rename_with(~ str_remove(.x, "t_")) %>%
  summarise(across(c(tot, herb, scar, acan, serr),
                   list(mean = ~ mean(.x, na.rm = TRUE),
                        se   = ~ se(.x)),
                   .names = "{.col}_{.fn}")) %>%
  ungroup()

fish_bm_year <- fish_bm_site %>%
  select(!contains("_se")) %>%
  rename_with(~ str_remove(.x, "_mean")) %>%
  group_by(site_cat, year) %>%
  summarise(across(c(tot, herb, scar, acan, serr),
                   list(mean = ~ mean(.x, na.rm = TRUE),
                        se   = ~ se(.x)),
                   .names = "{.col}_{.fn}")) %>%
  ungroup()

# importing and merging coral spp sheets for 2025
sheets <- excel_sheets(file_coral)
sheets_to_join <- setdiff(sheets, c("TermsOfUse", "Metadata", "Overall")) # exclude two sheets before merging remainder
sheets_list <- map(sheets_to_join, ~ read_excel(file_coral, sheet = .x) %>%
                     rename_with(~ gsub("([A-Za-z]{4})(avg|med|std)$", "\\1_\\2", .x)) %>% 
                     rename_with(~ gsub("^(t)([A-Za-z]{4})(ave|med|std)$", "\\1_\\2_\\3", .x)) %>% 
                     clean_names())

coral_spp_2025 <- reduce(sheets_list, left_join, by = c("survey_id", "code", "name", "nt"))


# benthic ~ fish by year
benth_fish_year <- benth_fish_site %>%
  select(-contains("_se")) %>%
  rename_with(~ str_remove(.x, "_mean$")) %>%
  group_by(year, site_cat) %>%
  summarise(
    across(
      where(is.numeric),
      list(mean = ~mean(.x, na.rm = TRUE),
           se   = ~se(.x)),
      .names = "{.col}_{.fn}"
    ),
    .groups = "drop"
  )